{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "075f9c39",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "train = pd.read_csv('train.csv')\n",
    "test = pd.read_csv('test.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "c8509770",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>MSSubClass</th>\n",
       "      <th>MSZoning</th>\n",
       "      <th>LotFrontage</th>\n",
       "      <th>LotArea</th>\n",
       "      <th>Street</th>\n",
       "      <th>Alley</th>\n",
       "      <th>LotShape</th>\n",
       "      <th>LandContour</th>\n",
       "      <th>Utilities</th>\n",
       "      <th>...</th>\n",
       "      <th>PoolArea</th>\n",
       "      <th>PoolQC</th>\n",
       "      <th>Fence</th>\n",
       "      <th>MiscFeature</th>\n",
       "      <th>MiscVal</th>\n",
       "      <th>MoSold</th>\n",
       "      <th>YrSold</th>\n",
       "      <th>SaleType</th>\n",
       "      <th>SaleCondition</th>\n",
       "      <th>SalePrice</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>65.0</td>\n",
       "      <td>8450</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>208500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>80.0</td>\n",
       "      <td>9600</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>2007</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>181500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>68.0</td>\n",
       "      <td>11250</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>223500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>70</td>\n",
       "      <td>RL</td>\n",
       "      <td>60.0</td>\n",
       "      <td>9550</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2006</td>\n",
       "      <td>WD</td>\n",
       "      <td>Abnorml</td>\n",
       "      <td>140000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>84.0</td>\n",
       "      <td>14260</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>250000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>50</td>\n",
       "      <td>RL</td>\n",
       "      <td>85.0</td>\n",
       "      <td>14115</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>MnPrv</td>\n",
       "      <td>Shed</td>\n",
       "      <td>700</td>\n",
       "      <td>10</td>\n",
       "      <td>2009</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>143000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>75.0</td>\n",
       "      <td>10084</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>2007</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>307000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10382</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Shed</td>\n",
       "      <td>350</td>\n",
       "      <td>11</td>\n",
       "      <td>2009</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>200000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>50</td>\n",
       "      <td>RM</td>\n",
       "      <td>51.0</td>\n",
       "      <td>6120</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Abnorml</td>\n",
       "      <td>129900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>190</td>\n",
       "      <td>RL</td>\n",
       "      <td>50.0</td>\n",
       "      <td>7420</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>118000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>11</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>70.0</td>\n",
       "      <td>11200</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>129500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>12</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>85.0</td>\n",
       "      <td>11924</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>2006</td>\n",
       "      <td>New</td>\n",
       "      <td>Partial</td>\n",
       "      <td>345000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12968</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR2</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>144000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>14</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>91.0</td>\n",
       "      <td>10652</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>2007</td>\n",
       "      <td>New</td>\n",
       "      <td>Partial</td>\n",
       "      <td>279500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>15</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10920</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>GdWo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>157000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>16</td>\n",
       "      <td>45</td>\n",
       "      <td>RM</td>\n",
       "      <td>51.0</td>\n",
       "      <td>6120</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>GdPrv</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>2007</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>132000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>17</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11241</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Shed</td>\n",
       "      <td>700</td>\n",
       "      <td>3</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>149000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>18</td>\n",
       "      <td>90</td>\n",
       "      <td>RL</td>\n",
       "      <td>72.0</td>\n",
       "      <td>10791</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Shed</td>\n",
       "      <td>500</td>\n",
       "      <td>10</td>\n",
       "      <td>2006</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>90000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>19</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>66.0</td>\n",
       "      <td>13695</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>159000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>70.0</td>\n",
       "      <td>7560</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>MnPrv</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>2009</td>\n",
       "      <td>COD</td>\n",
       "      <td>Abnorml</td>\n",
       "      <td>139000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>20 rows × 81 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \\\n",
       "0    1          60       RL         65.0     8450   Pave   NaN      Reg   \n",
       "1    2          20       RL         80.0     9600   Pave   NaN      Reg   \n",
       "2    3          60       RL         68.0    11250   Pave   NaN      IR1   \n",
       "3    4          70       RL         60.0     9550   Pave   NaN      IR1   \n",
       "4    5          60       RL         84.0    14260   Pave   NaN      IR1   \n",
       "5    6          50       RL         85.0    14115   Pave   NaN      IR1   \n",
       "6    7          20       RL         75.0    10084   Pave   NaN      Reg   \n",
       "7    8          60       RL          NaN    10382   Pave   NaN      IR1   \n",
       "8    9          50       RM         51.0     6120   Pave   NaN      Reg   \n",
       "9   10         190       RL         50.0     7420   Pave   NaN      Reg   \n",
       "10  11          20       RL         70.0    11200   Pave   NaN      Reg   \n",
       "11  12          60       RL         85.0    11924   Pave   NaN      IR1   \n",
       "12  13          20       RL          NaN    12968   Pave   NaN      IR2   \n",
       "13  14          20       RL         91.0    10652   Pave   NaN      IR1   \n",
       "14  15          20       RL          NaN    10920   Pave   NaN      IR1   \n",
       "15  16          45       RM         51.0     6120   Pave   NaN      Reg   \n",
       "16  17          20       RL          NaN    11241   Pave   NaN      IR1   \n",
       "17  18          90       RL         72.0    10791   Pave   NaN      Reg   \n",
       "18  19          20       RL         66.0    13695   Pave   NaN      Reg   \n",
       "19  20          20       RL         70.0     7560   Pave   NaN      Reg   \n",
       "\n",
       "   LandContour Utilities  ... PoolArea PoolQC  Fence MiscFeature MiscVal  \\\n",
       "0          Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "1          Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "2          Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "3          Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "4          Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "5          Lvl    AllPub  ...        0    NaN  MnPrv        Shed     700   \n",
       "6          Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "7          Lvl    AllPub  ...        0    NaN    NaN        Shed     350   \n",
       "8          Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "9          Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "10         Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "11         Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "12         Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "13         Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "14         Lvl    AllPub  ...        0    NaN   GdWo         NaN       0   \n",
       "15         Lvl    AllPub  ...        0    NaN  GdPrv         NaN       0   \n",
       "16         Lvl    AllPub  ...        0    NaN    NaN        Shed     700   \n",
       "17         Lvl    AllPub  ...        0    NaN    NaN        Shed     500   \n",
       "18         Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "19         Lvl    AllPub  ...        0    NaN  MnPrv         NaN       0   \n",
       "\n",
       "   MoSold YrSold  SaleType  SaleCondition  SalePrice  \n",
       "0       2   2008        WD         Normal     208500  \n",
       "1       5   2007        WD         Normal     181500  \n",
       "2       9   2008        WD         Normal     223500  \n",
       "3       2   2006        WD        Abnorml     140000  \n",
       "4      12   2008        WD         Normal     250000  \n",
       "5      10   2009        WD         Normal     143000  \n",
       "6       8   2007        WD         Normal     307000  \n",
       "7      11   2009        WD         Normal     200000  \n",
       "8       4   2008        WD        Abnorml     129900  \n",
       "9       1   2008        WD         Normal     118000  \n",
       "10      2   2008        WD         Normal     129500  \n",
       "11      7   2006       New        Partial     345000  \n",
       "12      9   2008        WD         Normal     144000  \n",
       "13      8   2007       New        Partial     279500  \n",
       "14      5   2008        WD         Normal     157000  \n",
       "15      7   2007        WD         Normal     132000  \n",
       "16      3   2010        WD         Normal     149000  \n",
       "17     10   2006        WD         Normal      90000  \n",
       "18      6   2008        WD         Normal     159000  \n",
       "19      5   2009       COD        Abnorml     139000  \n",
       "\n",
       "[20 rows x 81 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "73d9ae93",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>MSSubClass</th>\n",
       "      <th>MSZoning</th>\n",
       "      <th>LotFrontage</th>\n",
       "      <th>LotArea</th>\n",
       "      <th>Street</th>\n",
       "      <th>Alley</th>\n",
       "      <th>LotShape</th>\n",
       "      <th>LandContour</th>\n",
       "      <th>Utilities</th>\n",
       "      <th>...</th>\n",
       "      <th>ScreenPorch</th>\n",
       "      <th>PoolArea</th>\n",
       "      <th>PoolQC</th>\n",
       "      <th>Fence</th>\n",
       "      <th>MiscFeature</th>\n",
       "      <th>MiscVal</th>\n",
       "      <th>MoSold</th>\n",
       "      <th>YrSold</th>\n",
       "      <th>SaleType</th>\n",
       "      <th>SaleCondition</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1461</td>\n",
       "      <td>20</td>\n",
       "      <td>RH</td>\n",
       "      <td>80.0</td>\n",
       "      <td>11622</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>120</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>MnPrv</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1462</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>81.0</td>\n",
       "      <td>14267</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Gar2</td>\n",
       "      <td>12500</td>\n",
       "      <td>6</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1463</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>74.0</td>\n",
       "      <td>13830</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>MnPrv</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1464</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>78.0</td>\n",
       "      <td>9978</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1465</td>\n",
       "      <td>120</td>\n",
       "      <td>RL</td>\n",
       "      <td>43.0</td>\n",
       "      <td>5005</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>HLS</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>144</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1466</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>75.0</td>\n",
       "      <td>10000</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1467</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7980</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>GdPrv</td>\n",
       "      <td>Shed</td>\n",
       "      <td>500</td>\n",
       "      <td>3</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1468</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>63.0</td>\n",
       "      <td>8402</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1469</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>85.0</td>\n",
       "      <td>10176</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1470</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>70.0</td>\n",
       "      <td>8400</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>MnPrv</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 80 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \\\n",
       "0  1461          20       RH         80.0    11622   Pave   NaN      Reg   \n",
       "1  1462          20       RL         81.0    14267   Pave   NaN      IR1   \n",
       "2  1463          60       RL         74.0    13830   Pave   NaN      IR1   \n",
       "3  1464          60       RL         78.0     9978   Pave   NaN      IR1   \n",
       "4  1465         120       RL         43.0     5005   Pave   NaN      IR1   \n",
       "5  1466          60       RL         75.0    10000   Pave   NaN      IR1   \n",
       "6  1467          20       RL          NaN     7980   Pave   NaN      IR1   \n",
       "7  1468          60       RL         63.0     8402   Pave   NaN      IR1   \n",
       "8  1469          20       RL         85.0    10176   Pave   NaN      Reg   \n",
       "9  1470          20       RL         70.0     8400   Pave   NaN      Reg   \n",
       "\n",
       "  LandContour Utilities  ... ScreenPorch PoolArea PoolQC  Fence MiscFeature  \\\n",
       "0         Lvl    AllPub  ...         120        0    NaN  MnPrv         NaN   \n",
       "1         Lvl    AllPub  ...           0        0    NaN    NaN        Gar2   \n",
       "2         Lvl    AllPub  ...           0        0    NaN  MnPrv         NaN   \n",
       "3         Lvl    AllPub  ...           0        0    NaN    NaN         NaN   \n",
       "4         HLS    AllPub  ...         144        0    NaN    NaN         NaN   \n",
       "5         Lvl    AllPub  ...           0        0    NaN    NaN         NaN   \n",
       "6         Lvl    AllPub  ...           0        0    NaN  GdPrv        Shed   \n",
       "7         Lvl    AllPub  ...           0        0    NaN    NaN         NaN   \n",
       "8         Lvl    AllPub  ...           0        0    NaN    NaN         NaN   \n",
       "9         Lvl    AllPub  ...           0        0    NaN  MnPrv         NaN   \n",
       "\n",
       "  MiscVal MoSold  YrSold  SaleType  SaleCondition  \n",
       "0       0      6    2010        WD         Normal  \n",
       "1   12500      6    2010        WD         Normal  \n",
       "2       0      3    2010        WD         Normal  \n",
       "3       0      6    2010        WD         Normal  \n",
       "4       0      1    2010        WD         Normal  \n",
       "5       0      4    2010        WD         Normal  \n",
       "6     500      3    2010        WD         Normal  \n",
       "7       0      5    2010        WD         Normal  \n",
       "8       0      2    2010        WD         Normal  \n",
       "9       0      4    2010        WD         Normal  \n",
       "\n",
       "[10 rows x 80 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "a9bae839",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1460, 81)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "c3668205",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1459, 80)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "d2ae89c1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<bound method DataFrame.info of         Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \\\n",
       "0        1          60       RL         65.0     8450   Pave   NaN      Reg   \n",
       "1        2          20       RL         80.0     9600   Pave   NaN      Reg   \n",
       "2        3          60       RL         68.0    11250   Pave   NaN      IR1   \n",
       "3        4          70       RL         60.0     9550   Pave   NaN      IR1   \n",
       "4        5          60       RL         84.0    14260   Pave   NaN      IR1   \n",
       "...    ...         ...      ...          ...      ...    ...   ...      ...   \n",
       "1455  1456          60       RL         62.0     7917   Pave   NaN      Reg   \n",
       "1456  1457          20       RL         85.0    13175   Pave   NaN      Reg   \n",
       "1457  1458          70       RL         66.0     9042   Pave   NaN      Reg   \n",
       "1458  1459          20       RL         68.0     9717   Pave   NaN      Reg   \n",
       "1459  1460          20       RL         75.0     9937   Pave   NaN      Reg   \n",
       "\n",
       "     LandContour Utilities  ... PoolArea PoolQC  Fence MiscFeature MiscVal  \\\n",
       "0            Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "1            Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "2            Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "3            Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "4            Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "...          ...       ...  ...      ...    ...    ...         ...     ...   \n",
       "1455         Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "1456         Lvl    AllPub  ...        0    NaN  MnPrv         NaN       0   \n",
       "1457         Lvl    AllPub  ...        0    NaN  GdPrv        Shed    2500   \n",
       "1458         Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "1459         Lvl    AllPub  ...        0    NaN    NaN         NaN       0   \n",
       "\n",
       "     MoSold YrSold  SaleType  SaleCondition  SalePrice  \n",
       "0         2   2008        WD         Normal     208500  \n",
       "1         5   2007        WD         Normal     181500  \n",
       "2         9   2008        WD         Normal     223500  \n",
       "3         2   2006        WD        Abnorml     140000  \n",
       "4        12   2008        WD         Normal     250000  \n",
       "...     ...    ...       ...            ...        ...  \n",
       "1455      8   2007        WD         Normal     175000  \n",
       "1456      2   2010        WD         Normal     210000  \n",
       "1457      5   2010        WD         Normal     266500  \n",
       "1458      4   2010        WD         Normal     142125  \n",
       "1459      6   2008        WD         Normal     147500  \n",
       "\n",
       "[1460 rows x 81 columns]>"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "id": "27801979",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Id                  0\n",
      "MSSubClass          0\n",
      "MSZoning            0\n",
      "LotFrontage       259\n",
      "LotArea             0\n",
      "Street              0\n",
      "Alley            1369\n",
      "LotShape            0\n",
      "LandContour         0\n",
      "Utilities           0\n",
      "LotConfig           0\n",
      "LandSlope           0\n",
      "Neighborhood        0\n",
      "Condition1          0\n",
      "Condition2          0\n",
      "BldgType            0\n",
      "HouseStyle          0\n",
      "OverallQual         0\n",
      "OverallCond         0\n",
      "YearBuilt           0\n",
      "YearRemodAdd        0\n",
      "RoofStyle           0\n",
      "RoofMatl            0\n",
      "Exterior1st         0\n",
      "Exterior2nd         0\n",
      "MasVnrType          8\n",
      "MasVnrArea          8\n",
      "ExterQual           0\n",
      "ExterCond           0\n",
      "Foundation          0\n",
      "BsmtQual           37\n",
      "BsmtCond           37\n",
      "BsmtExposure       38\n",
      "BsmtFinType1       37\n",
      "BsmtFinSF1          0\n",
      "BsmtFinType2       38\n",
      "BsmtFinSF2          0\n",
      "BsmtUnfSF           0\n",
      "TotalBsmtSF         0\n",
      "Heating             0\n",
      "HeatingQC           0\n",
      "CentralAir          0\n",
      "Electrical          1\n",
      "1stFlrSF            0\n",
      "2ndFlrSF            0\n",
      "LowQualFinSF        0\n",
      "GrLivArea           0\n",
      "BsmtFullBath        0\n",
      "BsmtHalfBath        0\n",
      "FullBath            0\n",
      "HalfBath            0\n",
      "BedroomAbvGr        0\n",
      "KitchenAbvGr        0\n",
      "KitchenQual         0\n",
      "TotRmsAbvGrd        0\n",
      "Functional          0\n",
      "Fireplaces          0\n",
      "FireplaceQu       690\n",
      "GarageType         81\n",
      "GarageYrBlt        81\n",
      "GarageFinish       81\n",
      "GarageCars          0\n",
      "GarageArea          0\n",
      "GarageQual         81\n",
      "GarageCond         81\n",
      "PavedDrive          0\n",
      "WoodDeckSF          0\n",
      "OpenPorchSF         0\n",
      "EnclosedPorch       0\n",
      "3SsnPorch           0\n",
      "ScreenPorch         0\n",
      "PoolArea            0\n",
      "PoolQC           1453\n",
      "Fence            1179\n",
      "MiscFeature      1406\n",
      "MiscVal             0\n",
      "MoSold              0\n",
      "YrSold              0\n",
      "SaleType            0\n",
      "SaleCondition       0\n",
      "SalePrice           0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 设置全部展示数据，不显示省略号\n",
    "pd.set_option('display.max_rows',None)\n",
    "pd.set_option('display.max_columns',None)\n",
    "\n",
    "print(train.isnull().sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "id": "2d3a8fde",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MSSubClass\n",
       "20     185224.811567\n",
       "30      95829.724638\n",
       "40     156125.000000\n",
       "45     108591.666667\n",
       "50     143302.972222\n",
       "60     239948.501672\n",
       "70     166772.416667\n",
       "75     192437.500000\n",
       "80     169736.551724\n",
       "85     147810.000000\n",
       "90     133541.076923\n",
       "120    200779.080460\n",
       "160    138647.380952\n",
       "180    102300.000000\n",
       "190    129613.333333\n",
       "Name: SalePrice, dtype: float64"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = train.groupby('MSSubClass')\n",
    "df['SalePrice'].mean()\n",
    "\n",
    "# train[train['MSSubClass']==40]['SalePrice'].mean()\n",
    "\n",
    "# groupby data = [[20,train[train['MSSubClass']==20]['SalePrice'].sum()/train[train['MSSubClass']==20]['SalePrice'].count()],\n",
    "#        [20,train[train['MSSubClass']==20]['SalePrice'].sum()/train[train['MSSubClass']==20]['SalePrice'].count()],\n",
    "#        [20,train[train['MSSubClass']==20]['SalePrice'].sum()/train[train['MSSubClass']==20]['SalePrice'].count()]\n",
    "#        [20,train[train['MSSubClass']==20]['SalePrice'].sum()/train[train['MSSubClass']==20]['SalePrice'].count()]\n",
    "#        [20,train[train['MSSubClass']==20]['SalePrice'].sum()/train[train['MSSubClass']==20]['SalePrice'].count()]\n",
    "#        [20,train[train['MSSubClass']==20]['SalePrice'].sum()/train[train['MSSubClass']==20]['SalePrice'].count()]\n",
    "#        [20,train[train['MSSubClass']==20]['SalePrice'].sum()/train[train['MSSubClass']==20]['SalePrice'].count()]]\n",
    "# df = pd.DataFrame(data,columns=['MSSubClass','SalePrice'],dtype=int)\n",
    "# df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "id": "31f0019f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MSZoning\n",
       "C (all)     74528.000000\n",
       "FV         214014.061538\n",
       "RH         131558.375000\n",
       "RL         191004.994787\n",
       "RM         126316.830275\n",
       "Name: SalePrice, dtype: float64"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = train.groupby('MSZoning')\n",
    "df['SalePrice'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "id": "de8543d2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Street\n",
       "Grvl    130190.500000\n",
       "Pave    181130.538514\n",
       "Name: SalePrice, dtype: float64"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = train.groupby('Street')\n",
    "df['SalePrice'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "id": "0f1d4078",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Alley\n",
       "Grvl    122219.080000\n",
       "Pave    168000.585366\n",
       "Name: SalePrice, dtype: float64"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = train.groupby('Alley')\n",
    "df['SalePrice'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "id": "b5c818a0",
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "'SalePrice'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "File \u001b[1;32mD:\\anaconda\\lib\\site-packages\\pandas\\core\\indexes\\base.py:3802\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m   3801\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 3802\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   3803\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
      "File \u001b[1;32mD:\\anaconda\\lib\\site-packages\\pandas\\_libs\\index.pyx:138\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
      "File \u001b[1;32mD:\\anaconda\\lib\\site-packages\\pandas\\_libs\\index.pyx:146\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
      "File \u001b[1;32mpandas\\_libs\\index_class_helper.pxi:49\u001b[0m, in \u001b[0;36mpandas._libs.index.Int64Engine._check_type\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;31mKeyError\u001b[0m: 'SalePrice'",
      "\nThe above exception was the direct cause of the following exception:\n",
      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[99], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mtrain\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mLotFrontage\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msort_values\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mSalePrice\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\n",
      "File \u001b[1;32mD:\\anaconda\\lib\\site-packages\\pandas\\core\\series.py:981\u001b[0m, in \u001b[0;36mSeries.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m    978\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[key]\n\u001b[0;32m    980\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m key_is_scalar:\n\u001b[1;32m--> 981\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    983\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_hashable(key):\n\u001b[0;32m    984\u001b[0m     \u001b[38;5;66;03m# Otherwise index.get_value will raise InvalidIndexError\u001b[39;00m\n\u001b[0;32m    985\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m    986\u001b[0m         \u001b[38;5;66;03m# For labels that don't resolve as scalars like tuples and frozensets\u001b[39;00m\n",
      "File \u001b[1;32mD:\\anaconda\\lib\\site-packages\\pandas\\core\\series.py:1089\u001b[0m, in \u001b[0;36mSeries._get_value\u001b[1;34m(self, label, takeable)\u001b[0m\n\u001b[0;32m   1086\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[label]\n\u001b[0;32m   1088\u001b[0m \u001b[38;5;66;03m# Similar to Index.get_value, but we do not fall back to positional\u001b[39;00m\n\u001b[1;32m-> 1089\u001b[0m loc \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabel\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1090\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mindex\u001b[38;5;241m.\u001b[39m_get_values_for_loc(\u001b[38;5;28mself\u001b[39m, loc, label)\n",
      "File \u001b[1;32mD:\\anaconda\\lib\\site-packages\\pandas\\core\\indexes\\base.py:3804\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m   3802\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine\u001b[38;5;241m.\u001b[39mget_loc(casted_key)\n\u001b[0;32m   3803\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m-> 3804\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[0;32m   3805\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[0;32m   3806\u001b[0m     \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[0;32m   3807\u001b[0m     \u001b[38;5;66;03m#  InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[0;32m   3808\u001b[0m     \u001b[38;5;66;03m#  the TypeError.\u001b[39;00m\n\u001b[0;32m   3809\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n",
      "\u001b[1;31mKeyError\u001b[0m: 'SalePrice'"
     ]
    }
   ],
   "source": [
    "df = train"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
